import jieba
from collections import Counter
import pandas as pd

if __name__ == '__main__':
    sentence1 = 'this is a dog'
    sentence2 = 'there are three cats'
    sentence3 = 'this is a dog that is blank'

    sentence_list = [sentence1, sentence2, sentence3]

    all_context = ' '.join(sentence_list)
    # 对所有单词分词
    all_word = jieba.lcut(all_context)
    # 建立词典和索引单词映射
    dict_word = set(all_word)
    dim_size = len(dict_word)
    # print(dict_word)
    # idx_2_word = {idx: word for idx, word in enumerate(all_word)}
    word_2_idx = {word: idx for idx, word in enumerate(dict_word)}
    # 词频统计
    cob_list = []
    for sentence in sentence_list:
        li = [0 for i in range(dim_size)]
        counter = Counter(jieba.lcut(sentence))
        for word, count in counter.items():
            idx = word_2_idx[word]
            li[idx] = count

        cob_list.append(li)

    # print(cob_list)
    # pandas可视化
    data = pd.DataFrame(cob_list, columns=list(dict_word))
    print(data)

